import pandas as pd



def get_city(df, lm):
    get_province(df, lm)
    df_shi = pd.read_excel('地区划码（省市区街道）.xlsx', sheet_name='市')
    df_shi['简化市'] = df_shi['市'].str.replace('市', '').str.replace('自治州', '').str.replace('地区', '')
    df_shi['简化省'] = df_shi['省'].str.replace('省', '').str.replace('自治区', '').str.replace('市', '')
    city_index = df_shi.set_index('简化市')['市'].to_dict()
    df_district = pd.read_excel('地区划码（省市区街道）.xlsx', sheet_name='区县')
    df_district['简化区'] = df_district['区'].str.replace('产业开发区', '').str.replace('自治县', '').str.replace('经济开发区', '')
    df_district['简化市'] = df_district['市'].str.replace('市', '').str.replace('自治州', '').str.replace('地区', '')
    df_district['简化省'] = df_district['省'].str.replace('省', '').str.replace('自治区', '').str.replace('市', '')
    def extract_city_name(dz):
        for city in city_index.keys():
            if city in dz:
                return city_index[city]
        return None
    def extract_district_name(dz):
        fl = df_district[df_district['简化区'].apply(lambda x: x in dz)]
        if len(fl) > 0:
            if len(fl) == 1:
                return fl.iloc[0]['市']
            else:
                temp = df_district[df_district['简化省'].apply(lambda x: x in dz)]
                merged_df = pd.merge(fl, temp, how='inner')
                if len(merged_df) > 0:
                    return merged_df.iloc[0]['市']
                else:
                    return None
        else:
            return None


        # return extract_district_name(dz)
    df["所属城市"] = df[lm].apply(extract_city_name)
    mask = (df["所属城市"].isna())
    filtered_df = df[mask].copy()
    filtered_df["所属城市"] = filtered_df[lm].apply(extract_district_name).copy()
    df.update(filtered_df[["所属城市"]])
    filtered_df = df[(df['所属省份'].isnull()) & (df['所属城市'].notnull())].copy()
    province_map = df_shi.set_index('市')['省'].to_dict()
    filtered_df['所属省份'] = filtered_df['所属城市'].map(province_map).copy()
    df.update(filtered_df[["所属省份"]])
    return df


def get_province(df, lm):
    df_province = pd.read_excel('地区划码（省市区街道）.xlsx', sheet_name='省')
    df_province['简化省'] = df_province['省'].str.replace('省', '').str.replace('自治区', '').str.replace('市', '')
    province_index = df_province.set_index('简化省')['省'].to_dict()
    def extract_province_name(dz):
        for city in province_index.keys():
            if city in dz:
                return province_index[city]
        return None
    df["所属省份"] = df[lm].apply(extract_province_name)
    return df


def get_district(df, lm):
    df_district = pd.read_excel('地区划码（省市区街道）.xlsx', sheet_name='区县')
    df_district['简化区'] = df_district['区'].str.replace('产业开发区', '').str.replace('自治县', '').str.replace('经济开发区','')
    df_district['简化市'] = df_district['市'].str.replace('市', '').str.replace('自治州', '').str.replace('地区', '')
    df_district['简化省'] = df_district['省'].str.replace('省', '').str.replace('自治区', '').str.replace('市', '')
    def extract_district_name(dz):
        fl = df_district[df_district['简化区'].apply(lambda x: x in dz)]
        if len(fl) > 0:
            if len(fl) == 1:
                return fl.iloc[0]['区']
            else:
                return 'xxxx'
                # temp = df_district[df_district['简化省'].apply(lambda x: x in dz)]
                # merged_df = pd.merge(fl, temp, how='inner')
                # merged_df = pd.merge(fl, temp, how='inner')
                # if len(merged_df) > 0:
                #     return merged_df.iloc[0]['区']
                # else:
                #     return None
        else:
            return None
    def district2(dz):
        fl = df_district[df_district['简化区'].apply(lambda x: x in dz)]
        temp = df_district[df_district['简化省'].apply(lambda x: x in dz)]
        merged_df = pd.merge(fl, temp, how='inner')
        if len(merged_df) > 0:
            return merged_df.iloc[0]['区']
        else:
            return None

    df["所属区县"] = df[lm].apply(extract_district_name).copy()
    filtered_df = df[df['所属区县'] == 'xxxx'].copy()
    filtered_df["所属区县"] = filtered_df[lm].apply(district2).copy()
    df.update(filtered_df[["所属区县"]])
    df = get_city(df, lm)
    return df


if __name__ == '__main__':
    df_test = pd.read_excel('商务部直销企业数据.xlsx', sheet_name='直销企业')
    df = get_province(df_test, 'address')
    print(df)
